Overview

Education For Employment (EFE) is the leading nonprofit that trains youth and links them to jobs across the Middle East and North Africa (MENA). This pivotal region is the hardest place on the planet for youth to get their first job – they are three times more likely to be unemployed than older adults.

EFE is interested in the effectiveness of their programs, particularly whether graduates find stable employment. We have data on about 7,000 participants in almost 500 program cohorts spread across 8 countries. Participants bring diverse skills, interests, and backgrounds. Programs employ a variety of training models and placement policies. How well are different programs working, and for whom?

Data

EFE has an Salesforce database that houses all information about the organization’s programs, participants, and job placement and retention outcomes. The datasets used in this project include:

  • Participant demographics (program applications): Contacts.csv
  • Pre- and post- program participation surveys (job skills, confidence,…): PrePost Surveys.csv
  • Employment information at 3-month intervals for up to 2 years: Employment Status Checks Blind.csv

Data Pre-Processing

Initially, the data exports from Salesforce were pre-processed in the following ways:

  • To protect PII, Contact ID, Program Name, Company Name, and Class Name were all anonymized by replacing the actual IDs/Names with new, numerical IDs. Lookup tables were sent to EFE so that they can bring the actual values back in if company, program, or class name are significant in the analysis.
  • The applications dataset sometimes contained more than one application per person. Information from previous applications was deemed to be not as important, so only the most recent application was retained, and a new column containing the number of applications each person submitted was added. The application data was then joined to the contact data to create a more complete profile for each program participant.
  • Any individuals who were not present in the Employment Status Check dataset were removed from the contact and pre/post survey datasets.

The preprocessing script can be viewed on GitHub so that the deidentification steps can be reproduced with new data exports.

Feature Engineering

Job Placement

The contact dataset contains 8 columns that relate to when each participant obtained employment. These can be collapsed into a single column that gives the time it took for the participant to get placed, or that they were not placed or could not be reached. Below, the new composite column is on the right, and the original job placement columns can be removed.

## Clean the contacts file column names
names(contacts)[28] <- "pl_grad"
names(contacts)[29] <- "pl_grad_data"
names(contacts)[30] <- "pl_3"
names(contacts)[31] <- "pl_3_data"
names(contacts)[32] <- "pl_6"
names(contacts)[33] <- "pl_6_data"
names(contacts)[34] <- "pl_9"
names(contacts)[35] <- "pl_9_data"
names(contacts)[36] <- "pl_12"
names(contacts)[37] <- "pl_12_data"

cols.num <- c(28:37)
contacts[cols.num] <- sapply(contacts[cols.num],as.numeric)

## Make collapsed job placement month column and columns that indicate retention at each 3 month period after placement

contacts_raw<-contacts%>%
  mutate(months_job=ifelse(pl_grad==1,"Placed at Graduation",
                           ifelse(pl_3==1,"Placed at 3 Months",
                                  ifelse(pl_6==1,"Placed at 6 Months",
                                         ifelse(pl_9==1,"Placed at 9 Months",
                                                ifelse(pl_12==1,"Placed at 12 Months",
                                                       ifelse(pl_12_data==1&pl_12==0,"Not Placed","Not Reached at 12 Months")
                                                            )))
                                  )))%>%
  mutate(sorting=ifelse(pl_grad==1,1,
                           ifelse(pl_3==1,2,
                                  ifelse(pl_6==1,3,
                                         ifelse(pl_9==1,4,
                                                ifelse(pl_12==1,5,
                                                       ifelse(pl_12_data==1&pl_12==0,6,7)
                                                            )))
                                  )))%>%
  mutate(Retention_6_months=ifelse(X6.Month.Job.Retention==1,1,
                                   ifelse(X6.Month.Job.Retention==0&X6.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
  mutate(`Retention at 6 Months`=ifelse(X6.Month.Job.Retention==1,"Yes",
          ifelse(X6.Month.Job.Retention==0&X6.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
  
  mutate(Retention_3_months=ifelse(X3.Month.Job.Retention==1,1,
                                   ifelse(X3.Month.Job.Retention==0&X3.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
  mutate(`Retention at 3 Months`=ifelse(X3.Month.Job.Retention==1,"Yes",
          ifelse(X3.Month.Job.Retention==0&X3.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
  
  mutate(Retention_9_months=ifelse(X9.Month.Job.Retention==1,1,
                                   ifelse(X9.Month.Job.Retention==0&X9.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
  mutate(`Retention at 9 Months`=ifelse(X9.Month.Job.Retention==1,"Yes",
          ifelse(X9.Month.Job.Retention==0&X9.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
  
  mutate(Retention_12_months=ifelse(X12.Month.Job.Retention==1,1,
                                   ifelse(X12.Month.Job.Retention==0&X12.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
  mutate(`Retention at 12 Months`=ifelse(X12.Month.Job.Retention==1,"Yes",
          ifelse(X12.Month.Job.Retention==0&X12.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))



## make data to display in table
months_job<-contacts_raw%>%
  dplyr::select(28:37,107,108)%>%
  group_by(months_job)%>%
  filter(row_number()==1)%>%
  arrange(sorting)%>%
  dplyr::select(1,3,5,7,9,10)


## create table
datatable(months_job, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 
## Filter out contacts who joined the program less than 6 months before the data was pulled, and those who got jobs 30 or more days before graduating from the program

contactsFiltered<-contacts_raw%>%
  filter(as.Date(EFE.Graduation.Date)<=as.Date("2022-04-21"))%>%
  filter(as.numeric(Number.Days.Until.Job.Placement)>=-30|is.na(as.numeric(Number.Days.Until.Job.Placement)))

Job Retention

Job retention at 6 months is the initial outcome variable in the analysis. Therefore, if the participant can not be reached at 6 months after job placement, they are filtered out. Participants that graduated less than 6 months before the data was pulled, and participants that got a job more than 30 days before graduating are also filtered out. After these steps, of the original 7124 participants in the data, only 2652 remain in the dataset that will be included in the analysis.

Overall, of the 2652 participants there is data for, 47.6% had retained employment 6 months after being placed in a job.

## create data for retention table
retentiontable<-contactsFiltered%>%
  group_by(`Retention at 6 Months`)%>%
  summarise(Participants=n_distinct(ContactID))

## create table
datatable(retentiontable, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 

Employment Status Check Surveys

The analysis is initially interested in retention at 6 months; therefore the employment status check data will be filtered to contain only the 6 month surveys, and those survey responses can be joined to the participant contact information retaining a 1:1 relationship.

## create numerical encodings for each response option in confidence and self-efficacy surveys

surveys<-PrePost%>%
  mutate(ClassID=as.numeric(ClassID))%>%
  mutate(ConfidenceTeam=ifelse(Confidence.in.Working.on.a.Team=="Not at all confident",1,
                        ifelse(Confidence.in.Working.on.a.Team=="Unconfident",2,
                        ifelse(Confidence.in.Working.on.a.Team=="Neutral",3,
                        ifelse(Confidence.in.Working.on.a.Team=="Confident",4,
                        ifelse(Confidence.in.Working.on.a.Team=="Very confident",5,NA))))))%>%
  mutate(ConfidenceCommunication=ifelse(Confidence.in.Communicating.w.Colleague=="Not at all confident",1,
                        ifelse(Confidence.in.Communicating.w.Colleague=="Unconfident",2,
                        ifelse(Confidence.in.Communicating.w.Colleague=="Neutral",3,
                        ifelse(Confidence.in.Communicating.w.Colleague=="Confident",4,
                        ifelse(Confidence.in.Communicating.w.Colleague=="Very confident",5,NA))))))%>%
  mutate(ConfidenceResolvingProblems=ifelse(Confidence.in.Resolving.Problems=="Not at all confident",1,
                        ifelse(Confidence.in.Resolving.Problems=="Unconfident",2,
                        ifelse(Confidence.in.Resolving.Problems=="Neutral",3,
                        ifelse(Confidence.in.Resolving.Problems=="Confident",4,
                        ifelse(Confidence.in.Resolving.Problems=="Very confident",5,NA))))))%>%
  mutate(ConfidenceJobMatching=ifelse(Confidence.in.Finding.Job.Matching.Back.=="Not at all confident",1,
                        ifelse(Confidence.in.Finding.Job.Matching.Back.=="Unconfident",2,
                        ifelse(Confidence.in.Finding.Job.Matching.Back.=="Neutral",3,
                        ifelse(Confidence.in.Finding.Job.Matching.Back.=="Confident",4,
                        ifelse(Confidence.in.Finding.Job.Matching.Back.=="Very confident",5,NA))))))%>%
  mutate(ConfidencePresentingYourself=ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Not at all confident",1,
                        ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Unconfident",2,
                        ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Neutral",3,
                        ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Confident",4,
                        ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Very confident",5,NA))))))%>%
  mutate(ConfidenceTotal=ConfidenceCommunication+ConfidenceTeam+ConfidencePresentingYourself+ConfidenceJobMatching+ConfidenceResolvingProblems)%>%
  mutate(Stage=ifelse(str_detect(Survey..Record.Type,"Post"),"Post","Pre"))%>%
  mutate(EfficacyOptimistic=ifelse(Optimistic.About.my.Future=="Strongly disagree",1,
                        ifelse(Optimistic.About.my.Future=="Disagree",2,
                        ifelse(Optimistic.About.my.Future=="Neutral",3,
                        ifelse(Optimistic.About.my.Future=="Agree",4,
                        ifelse(Optimistic.About.my.Future=="Strongly agree",5,NA))))))%>%
  mutate(EfficacyOvercome=ifelse(Work.Seriously.to.Overcome.Challenge=="Strongly disagree",1,
                        ifelse(Work.Seriously.to.Overcome.Challenge=="Disagree",2,
                        ifelse(Work.Seriously.to.Overcome.Challenge=="Neutral",3,
                        ifelse(Work.Seriously.to.Overcome.Challenge=="Agree",4,
                        ifelse(Work.Seriously.to.Overcome.Challenge=="Strongly agree",5,NA))))))%>%
  mutate(EfficacyQualifications=ifelse(Necessary.Qualifications.to.Succeed=="Strongly disagree",1,
                        ifelse(Necessary.Qualifications.to.Succeed=="Disagree",2,
                        ifelse(Necessary.Qualifications.to.Succeed=="Neutral",3,
                        ifelse(Necessary.Qualifications.to.Succeed=="Agree",4,
                        ifelse(Necessary.Qualifications.to.Succeed=="Strongly agree",5,NA))))))%>%
  mutate(EfficacyFindaJob=ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Strongly disagree",1,
                        ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Disagree",2,
                        ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Neutral",3,
                        ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Agree",4,
                        ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Strongly agree",5,NA))))))%>%
  mutate(EfficacyFamiliarRights=ifelse(Familiar.With.Rights.and.Duties=="Strongly disagree",1,
                        ifelse(Familiar.With.Rights.and.Duties=="Disagree",2,
                        ifelse(Familiar.With.Rights.and.Duties=="Neutral",3,
                        ifelse(Familiar.With.Rights.and.Duties=="Agree",4,
                        ifelse(Familiar.With.Rights.and.Duties=="Strongly agree",5,NA))))))%>%
  mutate(EfficacyTotal=EfficacyFamiliarRights+EfficacyFindaJob+EfficacyQualifications+EfficacyOvercome+EfficacyOptimistic)


surveys<-as.data.frame(lapply(surveys,unlist))


## Create table of confidence scores and changes in confidence ratings


Confidence<-surveys%>%
  dplyr::select(2,27,28)%>%
  pivot_wider(names_from = Stage,values_from = ConfidenceTotal)%>%
  filter(Pre!="NULL"&Post!="NULL")%>%
  mutate(ConfidenceChange=as.numeric(Post)-as.numeric(Pre),
         ConfidencePercent=formattable::percent(as.numeric(Post)-as.numeric(Pre))/as.numeric(Pre))%>%
  dplyr::rename("ConfidencePre"=3,
                "ConfidencePost"=2)


Confidence<-as.data.frame(lapply(Confidence,unlist))


## Create table of self-efficacy scores and changes in self-efficacy ratings

Efficacy<-surveys%>%
  dplyr::select(2,34,28)%>%
  pivot_wider(names_from = Stage,values_from = EfficacyTotal)%>%
  filter(Pre!="NULL"&Post!="NULL")%>%
  mutate(EfficacyChange=as.numeric(Post)-as.numeric(Pre),
         EfficacyPercent=formattable::percent(as.numeric(Post)-as.numeric(Pre))/as.numeric(Pre))%>%
  dplyr::rename("EfficacyPre"=3,
                "EfficacyPost"=2)

Efficacy<-as.data.frame(lapply(Efficacy,unlist))


## Create dataframe of most significant change answers

MostSignificantChange<-surveys%>%
  filter(Stage=="Post")%>%
  dplyr::select(2,17,18,19)


## Join confidence, self-efficacy, and most significant change dataframes to the filtered contacts data to produce a dataframe containing the key information from each

contactsProcessed<-contactsFiltered%>%
   left_join(Confidence,by="ContactID")%>%
   left_join(Efficacy,by="ContactID")%>%
   left_join(MostSignificantChange,by="ContactID")


contactsWithTarget<-contactsProcessed%>%
  filter(!is.na(Retention_6_months))

contactswithEfficacy<-contactsWithTarget%>%
   filter(!is.na(EfficacyPercent))

contactswithConfidence<-contactsWithTarget%>%
   filter(!is.na(ConfidencePercent))

contactswithChange<-contactsWithTarget%>%
   filter(!is.na(First.Change))

#contactsProcessed<-as.data.frame(lapply(contactsProcessed,unlist))

Pre and Post Training Surveys

The pre and post training surveys contain questions around confidence and self-efficacy that EFE is interested in looking into. There are five different questions relating to confidence, with answers on a “not at all confident” to “very confident” scale. These answers are turned into numbers so that a composite index can be created, and so that changes in confidence after participants have been through the training programs can be more easily measured.

793 of 2652 participants that remain in the filtered dataset have pre/post survey data. A sample of the confidence and self-efficacy change scores calculated are show below. These are each then joined to the contacts dataset.

Exploratory Data Analysis

Retention Rates by Gender

The retention rates for participants that have data at each time interval is shown below:

## Calculate retention rates by gender

genderTotals<-contactsFiltered%>%
  dplyr::select(ContactID, Gender,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(Gender)%>%
  summarise(Total=n_distinct(ContactID))

genderAll<-contactsFiltered%>%
  dplyr::select(ContactID, Gender,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(Gender, TimeScale)%>%
  summarise(Retained=sum(Retained),
            Total=n_distinct(ContactID))%>%
  mutate(Percent=percent(Retained/Total))%>%
  arrange(TimeScale)%>%
  dplyr::select(1,2,5)%>%
  pivot_wider(names_from = TimeScale,values_from = Percent)%>%
  dplyr::relocate(2,.after = 5)%>%
  left_join(genderTotals)

datatable(genderAll, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 

Retention Rates by Country

The retention rates for participants that have data at each time interval is shown below:

## Calculate retention rates by country

countryTotals<-contactsFiltered%>%
  dplyr::select(ContactID, `Country of Programming`,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(`Country of Programming`)%>%
  summarise(Total=n_distinct(ContactID))

country<-contactsFiltered%>%
  dplyr::select(ContactID, `Country of Programming`,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(`Country of Programming`, TimeScale)%>%
  summarise(Retained=sum(Retained),
            Total=n_distinct(ContactID))%>%
  mutate(Percent=percent(Retained/Total))%>%
  arrange(TimeScale)%>%
  dplyr::select(1,2,5)%>%
  pivot_wider(names_from = TimeScale,values_from = Percent)%>%
  dplyr::relocate(2,.after = 5)%>%
  left_join(countryTotals)

# country<-contactsFiltered%>%
#   filter(!is.na(Retention_6_months))%>%
#   group_by(`Country of Programming`,`Retention at 6 Months`)%>%
#   summarise(Participants=n_distinct(ContactID))%>%
#   mutate(Percent=percent(Participants/sum(Participants)),
#          Total=sum(Participants))%>%
#   dplyr::select(1,2,4,5)%>%
#   pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)


datatable(country, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 

Retention Rates by Time of Job Placement

The retention rates for participants that have data at each time interval is shown below:

## Calculate retention rates by time of job placement

placementTotals<-contactsFiltered%>%
  dplyr::select(ContactID, months_job,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(months_job)%>%
  summarise(Total=n_distinct(ContactID))

placement<-contactsFiltered%>%
  dplyr::select(ContactID, months_job,109,111,113,115)%>%
  pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
  filter(!is.na(Retained))%>%
  group_by(months_job, TimeScale)%>%
  summarise(Retained=sum(Retained),
            Total=n_distinct(ContactID))%>%
  mutate(Percent=percent(Retained/Total))%>%
  arrange(TimeScale)%>%
  dplyr::select(1,2,5)%>%
  pivot_wider(names_from = TimeScale,values_from = Percent)%>%
  dplyr::relocate(2,.after = 5)%>%
  left_join(placementTotals)%>%
  dplyr::rename("Placement Time"=1)

# placement<-contactsFiltered%>%
#   filter(!is.na(Retention_6_months))%>%
#   group_by(months_job,`Retention at 6 Months`)%>%
#     summarise(Participants=n_distinct(ContactID),
#               Sort=first(sorting))%>%
#   mutate(Percent=percent(Participants/sum(Participants)),
#          Total=sum(Participants))%>%
#   arrange(Sort)%>%
#   dplyr::select(1,2,5,6)%>%
#   pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)%>%
#   dplyr::rename("Placement Time"=1)


datatable(placement, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 

Confidence and Self-Efficacy Scores

The plots below utilize the composite confidence and self-efficacy scores to show overall changes between pre and post surveys.These plots include participants that had pre and post survey data, not only the ones that have pre and post survey data and 6 month job retention data.

Do the confidence and self-eficacy scores correlate with retention?

There are actually slight negative correlations between increases in confidence and efficacy scores and retention rates:

## 
##  Pearson's product-moment correlation
## 
## data:  contactsWithTarget$EfficacyChange and contactsWithTarget$Retention_6_months
## t = -2.5096, df = 791, p-value = 0.01228
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.15752412 -0.01937959
## sample estimates:
##         cor 
## -0.08887926

## 
##  Pearson's product-moment correlation
## 
## data:  contactsWithTarget$ConfidenceChange and contactsWithTarget$Retention_6_months
## t = -2.1018, df = 791, p-value = 0.03589
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.143398214 -0.004928606
## sample estimates:
##         cor 
## -0.07452262

Participants who lost their job

A categorical variable was calculated to identify those who had a job at one point, and then lost it. This was calculated by looking at each of the four time intervals (3 months, 6 months, 9 months, and 12 months), and taking all participants with a retention at that stage of “NO,” who also had a placement at any of the preceding time intervals.

## Joining ESC Surveys with contacts who are considered to have lost a job


contactsProcessedwithESCH<-contactsProcessed%>%
  filter(!is.na(Retention_6_months))

Lost_job<-contactsProcessedwithESCH%>%
  filter((`Retention at 3 Months`=="No"&(pl_grad==1))|
    (`Retention at 6 Months`=="No"&(pl_grad==1|pl_3==1))|
    (`Retention at 9 Months`=="No"&(pl_grad==1|pl_3==1||pl_6==1))|
    (`Retention at 12 Months`=="No"&(pl_grad==1|pl_3==1|pl_6==1|pl_9==1)))

Lost_job_contacts<-unique(Lost_job$ContactID)


Lost_job_ESC<-ESC%>%
  filter(ContactID%in%Lost_job_contacts)%>%
  filter(Employment.Status.Check.Type=="3-Month Post-Placement Status Check"|Employment.Status.Check.Type=="6-Month Post-Placement Status Check"|Employment.Status.Check.Type=="9-Month Post-Placement Status Check"|Employment.Status.Check.Type=="12-Month Post-Placement Status Check")


ReasonsForLeaving<-Lost_job_ESC%>%
  dplyr::select(ContactID,Employment.Status.Check.Type,First.Reason.Left.Last.Job)%>%
  pivot_wider(names_from = Employment.Status.Check.Type,values_from = First.Reason.Left.Last.Job)%>%
  mutate(`3-Month Post-Placement Status Check`=ifelse(`3-Month Post-Placement Status Check`=="NULL",NA,`3-Month Post-Placement Status Check`),
         `6-Month Post-Placement Status Check`=ifelse(`6-Month Post-Placement Status Check`=="NULL",NA,`6-Month Post-Placement Status Check`),
         `9-Month Post-Placement Status Check`=ifelse(`9-Month Post-Placement Status Check`=="NULL",NA,`9-Month Post-Placement Status Check`),
         `12-Month Post-Placement Status Check`=ifelse(`12-Month Post-Placement Status Check`=="NULL",NA,`12-Month Post-Placement Status Check`))%>%
  mutate(ReasonsforLeaving=ifelse(!is.na(`12-Month Post-Placement Status Check`),`12-Month Post-Placement Status Check`, 
                                 ifelse(!is.na(`9-Month Post-Placement Status Check`),`9-Month Post-Placement Status Check`,
                                        ifelse(!is.na(`6-Month Post-Placement Status Check`),`6-Month Post-Placement Status Check`,`3-Month Post-Placement Status Check`))))%>%
    mutate(ReasonsforLeaving=ifelse(str_detect(ReasonsforLeaving,"Position was temporary"),"Position was temporary",
                                    ifelse(str_detect(ReasonsforLeaving,"Benefits"),"Benefits",
                                           ifelse(str_detect(ReasonsforLeaving,"Salary"),"Salary",
                                                  ifelse(str_detect(ReasonsforLeaving,"Job not sufficiently prestigious"),"Job not sufficiently prestigious",
                                                         ifelse(str_detect(ReasonsforLeaving,"Focused on studyings"),"Focused on studying",
                                                                ifelse(str_detect(ReasonsforLeaving,"Wants to start own business"),"Wants to start own business",
                                                                       ifelse(str_detect(ReasonsforLeaving,"Dismissed from job"),"Dismissed from job",
                                                                              ifelse(str_detect(ReasonsforLeaving,"Job not matched with educatio"),"Job not matched with education",
                                                                                     ifelse(str_detect(ReasonsforLeaving,"Job too far from home"),"Job too far from home",
                                                                                            ifelse(str_detect(ReasonsforLeaving,"Position not appropriate"),"Position not appropriate",
                                                                                                    ifelse(str_detect(ReasonsforLeaving,"Other"),"Other",
                                                                                                            ifelse(str_detect(ReasonsforLeaving,"NA"),"NA",ReasonsforLeaving)))))))))))))
                                                                                                
Reasons<-unique(ReasonsForLeaving$ReasonsforLeaving)


#unique(ESC$Employment.Status.Check.Type)
# 
# ESC_not_retained<-ESC%>%
#   filter(Employment.Status.Check.Type=="6-Month Post-Placement Status Check")%>%
#   group_by(ContactID)%>%
#   slice(which.max(Survey..Created.Date))
Reasons for Leaving

Using the categorization calculated above for who lost their jobs at any point, the resons for leaving were looked at. Of 1165 participants who met this criteria, 0 particpants had at least one answer in an employment status check survey where they gave a reason for leaving their first job. If there were answers given in multiple employment status checks, the most recent answer was used. A sample of how this was done is below, where “ReasonsforLeaving” is the combined field using the most recent response. Responses were also cleaned to remove stray commas and nested answers.

The frequency of reasons given for leaving the first job is below, with NA and Other being the most frequent, and “Position is Temporary” and “Salary” being the top non-other responses for participants where there is data.

## Calculate counts of reasons for leaving

ReasonsForLeaving<-ReasonsForLeaving%>%
  unnest(ReasonsforLeaving)%>%
  group_by(ReasonsforLeaving)%>%
  add_count()%>%
  ungroup()%>%
  mutate(ReasonsforLeaving = fct_reorder(ReasonsforLeaving, n, .desc = FALSE))%>%
  dplyr::select(1,6)

## Join in reasons
contactsProcessedReasons<-contactsProcessed%>%
  left_join(ReasonsForLeaving,by="ContactID")

## Plot reasons

ggplot(ReasonsForLeaving, aes(x=ReasonsforLeaving))+
  coord_flip()+
  geom_text(stat='count', aes(label=..count..), hjust=-.5, size=2.5)+
  geom_bar(fill="#951B0D")+theme_minimal()

Job types for those who retained job at least 6 months
## Calculate rates of retention for different job placement types

participants<-contactsFiltered%>%
  filter(!is.na(Retention_6_months))%>%
  group_by(Earliest.Job.Placement.Type,`Retention at 6 Months`)%>%
  summarise(Participants=n_distinct(ContactID))%>%
  mutate(Percent=percent(Participants/sum(Participants)),
         Total=sum(Participants))%>%
  dplyr::select(1,2,4,5)%>%
  pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)%>%
  dplyr::rename("Job Placement Type"=1)


participantsCount<-contactsFiltered%>%
  filter(!is.na(Retention_6_months))

#n_distinct(participantsCount$ContactID[participantsCount$Retention_6_months==1])


datatable(participants, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback)) 

Of the 1264 participants who retained their jobs at least 6 months, the breakdown of different job types that were retained is below:

## Count job types of those retained past 6 months

JobTypesRetained<-contactsProcessedReasons%>%
  filter(Retention_6_months==1|Retention_9_months==1|Retention_12_months==1)%>%
  group_by(Earliest.Job.Placement.Type)%>%
  add_count()%>%
  ungroup()%>%
  mutate(Earliest.Job.Placement.Type = fct_reorder(Earliest.Job.Placement.Type, n, .desc = FALSE))

#levels(JobTypesRetained$Earliest.Job.Placement.Type)
#JobTypesRetained$Earliest.Job.Placement.Type

ggplot(JobTypesRetained, aes(x=Earliest.Job.Placement.Type))+
  coord_flip()+
  geom_text(stat='count', aes(label=..count..), hjust=-.5, size=2.5)+
  geom_bar(fill="#951B0D")+theme_minimal()

Modeling

A series of models will be tested to determine whether there are features that are important in whether or not participants retained the job the were placed in after 6 months.

# 
# contactsProcessed$Number.Days.Until.Job.Placement<-as.numeric(contactsProcessed$Number.Days.Until.Job.Placement)
# 
# contactsProcessed$Number.Days.Retained.Job<-as.numeric(contactsProcessed$Number.Days.Retained.Job)
# 
# contactsProcessed$Number.Family.Members<-as.numeric(contactsProcessed$Number.Family.Members)
# 
# contactsProcessed$X..Of.Days.Continuously.Working<-as.numeric(contactsProcessed$X..Of.Days.Continuously.Working)
# 
# contactsProcessed$Number.Family.Members.Working<-as.numeric(contactsProcessed$Number.Family.Members.Working)
# 
# contactsProcessed$Number.of.Previous.Jobs<-as.character(contactsProcessed$Number.of.Previous.Jobs)
# 
# contactsProcessed$Monthly.Family.Income<-as.numeric(contactsProcessed$Monthly.Family.Income)
# 
# contactsProcessed$Job.Tenure.in.Months..Second.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..Second.)
# 
# contactsProcessed$Job.Tenure.in.Months..Third.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..Third.)
# 
# contactsProcessed$Job.Tenure.in.Months..First.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..First.)
# 
# contactsProcessed$First.Previous.Job.Salary<-as.numeric(contactsProcessed$First.Previous.Job.Salary)
# 
# contactsProcessed$Second.Previous.Job.Salary<-as.numeric(contactsProcessed$Second.Previous.Job.Salary)
# 
# contactsProcessed$Third.Previous.Job.Salary<-as.numeric(contactsProcessed$Third.Previous.Job.Salary)
# 
# # 
# contactsFiltered$Third.Previous.Job.Salary<-as.numeric(contactsFiltered$Third.Previous.Job.Salary)
# 

# 
# contactsProcessed<-contactsFiltered%>%
#   left_join(Confidence,by="ContactID")%>%
#   left_join(Efficacy,by="ContactID")
#   
# 
# 
# # 
#  setwd("C:/Users/rcarder/Desktop")
#  write.csv(contactsProcessed,"ContactsProcessed.csv",row.names = FALSE)
# # 
# # 
#